{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "# 参数管理\n",
    "\n",
    "我们首先关注具有单隐藏层的多层感知机"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "origin_pos": 2,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[2, 1], dtype=Float32, value=\n",
       "[[-3.49030714e-04],\n",
       " [-5.60830755e-04]])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from d2l import mindspore as d2l\n",
    "from mindspore import nn\n",
    "\n",
    "net = nn.SequentialCell([nn.Dense(4, 8), nn.ReLU(), nn.Dense(8, 1)])\n",
    "X = d2l.rand((2, 4))\n",
    "net(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "参数访问"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "origin_pos": 6,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([('2.weight', Parameter (name=2.weight, shape=(1, 8), dtype=Float32, requires_grad=True)), ('2.bias', Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True))])\n"
     ]
    }
   ],
   "source": [
    "print(net[2].parameters_dict())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "目标参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "origin_pos": 10,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'mindspore.common.parameter.ParameterTensor'>\n",
      "Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True)\n",
      "Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "print(type(net[2].bias))\n",
    "print(net[2].bias)\n",
    "print(net[2].bias.value())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "一次性访问所有参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "origin_pos": 17,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('0.weight', (8, 4)) ('0.bias', (8,))\n",
      "('0.weight', (8, 4)) ('0.bias', (8,)) ('2.weight', (1, 8)) ('2.bias', (1,))\n"
     ]
    }
   ],
   "source": [
    "print(*[(name, param.shape) for name, param in net[0].parameters_dict().items()])\n",
    "print(*[(name, param.shape) for name, param in net.parameters_dict().items()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "origin_pos": 21,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net.parameters_dict()['2.bias'].value()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "从嵌套块收集参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "origin_pos": 25,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[2, 1], dtype=Float32, value=\n",
       "[[-3.91725408e-17],\n",
       " [-2.82850341e-17]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def block1():\n",
    "    return nn.SequentialCell([nn.Dense(4, 8), nn.ReLU(),\n",
    "                              nn.Dense(8, 4), nn.ReLU()])\n",
    "\n",
    "def block2():\n",
    "    net = nn.SequentialCell()\n",
    "    for i in range(4):\n",
    "        # 在这里嵌套\n",
    "        net.append(block1())\n",
    "    return net\n",
    "\n",
    "rgnet = nn.SequentialCell([block2(), nn.Dense(4, 1)])\n",
    "rgnet(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "我们已经设计了网络，让我们看看它是如何组织的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "origin_pos": 29,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SequentialCell<\n",
      "  (0): SequentialCell<\n",
      "    (0): SequentialCell<\n",
      "      (0): Dense<input_channels=4, output_channels=8, has_bias=True>\n",
      "      (1): ReLU<>\n",
      "      (2): Dense<input_channels=8, output_channels=4, has_bias=True>\n",
      "      (3): ReLU<>\n",
      "      >\n",
      "    (1): SequentialCell<\n",
      "      (0): Dense<input_channels=4, output_channels=8, has_bias=True>\n",
      "      (1): ReLU<>\n",
      "      (2): Dense<input_channels=8, output_channels=4, has_bias=True>\n",
      "      (3): ReLU<>\n",
      "      >\n",
      "    (2): SequentialCell<\n",
      "      (0): Dense<input_channels=4, output_channels=8, has_bias=True>\n",
      "      (1): ReLU<>\n",
      "      (2): Dense<input_channels=8, output_channels=4, has_bias=True>\n",
      "      (3): ReLU<>\n",
      "      >\n",
      "    (3): SequentialCell<\n",
      "      (0): Dense<input_channels=4, output_channels=8, has_bias=True>\n",
      "      (1): ReLU<>\n",
      "      (2): Dense<input_channels=8, output_channels=4, has_bias=True>\n",
      "      (3): ReLU<>\n",
      "      >\n",
      "    >\n",
      "  (1): Dense<input_channels=4, output_channels=1, has_bias=True>\n",
      "  >\n"
     ]
    }
   ],
   "source": [
    "print(rgnet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "origin_pos": 33,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter (name=0.1.0.bias, shape=(8,), dtype=Float32, requires_grad=True)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rgnet[0][1][0].bias.value()"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "默认情况下，MindSpore会使用Normal初始化权重矩阵，\n",
    "偏置参数设置为0。\n",
    "MindSpore的`common.initializer`模块中提供了各种初始化方法。"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "内置初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "origin_pos": 41,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Tensor(shape=[4], dtype=Float32, value= [-1.20215854e-02, -1.25061749e-02, -7.52139417e-03,  1.09155132e-02]),\n",
       " Tensor(shape=[], dtype=Float32, value= 0))"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = nn.SequentialCell([nn.Dense(4, 8, weight_init='normal', bias_init='zero'),\n",
    "                         nn.ReLU(),\n",
    "                         nn.Dense(8, 1, weight_init='normal', bias_init='zero')])\n",
    "\n",
    "net[0].weight.data[0], net[0].bias.data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "origin_pos": 45,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Tensor(shape=[4], dtype=Float32, value= [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,  1.00000000e+00]),\n",
       " Tensor(shape=[], dtype=Float32, value= 0))"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net = nn.SequentialCell([nn.Dense(4, 8, weight_init='one', bias_init='zero'),\n",
    "                         nn.ReLU(),\n",
    "                         nn.Dense(8, 1, weight_init='one', bias_init='zero')])\n",
    "\n",
    "net[0].weight.data[0], net[0].bias.data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "对某些块应用不同的初始化方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "origin_pos": 49,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.23717844 0.39029706 0.04387231 0.58519274]\n",
      "[42. 42. 42. 42. 42. 42. 42. 42.]\n"
     ]
    }
   ],
   "source": [
    "net = nn.SequentialCell([nn.Dense(4, 8, weight_init='xavier_uniform'),\n",
    "                         nn.ReLU(),\n",
    "                         nn.Dense(8, 1, weight_init=42)])\n",
    "\n",
    "print(net[0].weight.data[0])\n",
    "print(net[2].weight.data[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "自定义初始化\n",
    "同样，我们实现了一个`my_init`函数来应用到`net`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "origin_pos": 56,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[2, 4], dtype=Float32, value=\n",
       "[[ 5.15057087e+00, -0.00000000e+00,  6.03760958e+00, -8.03363037e+00],\n",
       " [-5.74584198e+00,  0.00000000e+00, -0.00000000e+00,  7.52724266e+00]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def my_init(shape):\n",
    "    weight = d2l.uniform(shape, -10, 10)\n",
    "    weight *= d2l.abs(weight) >= 5\n",
    "    return weight\n",
    "\n",
    "\n",
    "net = nn.SequentialCell([nn.Dense(4, 8, weight_init=my_init((8, 4))),\n",
    "                         nn.ReLU(),\n",
    "                         nn.Dense(8, 1, weight_init=my_init((1, 8)))])\n",
    "net[0].weight[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "origin_pos": 60,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4095486,7fc4402bd740,python):2021-11-07-23:27:27.742.999 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4095486,7fc4402bd740,python):2021-11-07-23:27:27.743.111 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[4], dtype=Float32, value= [ 4.20000000e+01,  1.00000000e+00,  7.03760958e+00, -7.03363037e+00])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net[0].weight.data[:] += 1\n",
    "net[0].weight.data[0, 0] = 42\n",
    "net[0].weight.data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "参数绑定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "origin_pos": 65,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True  True  True  True  True  True  True  True]\n",
      "[ True  True  True  True  True  True  True  True]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4095486,7fc4402bd740,python):2021-11-07-23:27:27.830.726 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4095486,7fc4402bd740,python):2021-11-07-23:27:27.830.784 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    }
   ],
   "source": [
    "# 我们需要给共享层一个名称，以便可以引用它的参数\n",
    "shared = nn.Dense(8, 8)\n",
    "net = nn.SequentialCell([nn.Dense(4, 8),\n",
    "                         nn.ReLU(),\n",
    "                         shared,\n",
    "                         nn.ReLU(),\n",
    "                         shared,\n",
    "                         nn.ReLU(),\n",
    "                         nn.Dense(8, 1)])\n",
    "net(X)\n",
    "# 检查参数是否相同\n",
    "print(net[2].weight.data[0] == net[4].weight.data[0])\n",
    "net[2].weight.data[0, 0] = 100\n",
    "# 确保它们实际上是同一个对象，而不只是有相同的值\n",
    "print(net[2].weight.data[0] == net[4].weight.data[0])"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  },
  "rise": {
   "autolaunch": true,
   "enable_chalkboard": true,
   "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
   "scroll": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}